#### 0. Dependencies ####
library(stargazer)
library(tidyverse)
library(broom)
library(texreg)
library(xtable)
library(pals)
library(dummies)
library(ggridges)
library(ggpubr)
library(cowplot)
set.seed(89)

#### 0. Functions ####

source("conjoint_functions.R")

#### 1. Read-in conjoint data ####

global_data <- read_rds("data/nbh_clean_conjoint_global.rds")

mod_type <- "ols" # toggle to "logit" for logistic regression

#### 2. Country analysis ####

## Figure 1 -- pooled model results (unweighted)

pooled_model_wgtd <- list()

pooled_model_wgtd[[mod_type]] <- global_data %>%
  nest(data = everything()) %>% # A bit convoluted, but keeps formatting same as country mods
  mutate(coefs = map(data,
                     function (x) {
                       figure_results(x,
                                      formula = select ~ vulnerability + transmission + income + occupation + age_category,
                                      cluster = "country",
                                      type = mod_type,
                                      weights = FALSE)
                     })) %>% 
  select(-data) %>% 
  unnest(cols = coefs) %>% # combine all conjoint results into single table
  results_tidy(.) %>% # apply some formatting
  mutate(region = "Pooled",
         country = "Global")

# Pooled model plot
base_plot(data = pooled_model_wgtd[[mod_type]]) +
  aes(color = "firebrick2", height = 0.5) +
  facet_grid(attribute~., space = "free", scales = "free_y",) +
  labs(x = "AMCE", y = "", color = "") +
  scale_y_discrete(limits = rev(levels("term"))) +
  theme(legend.position = "none",
        text = element_text(size = 12),
        strip.text = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12)) +
  ggsave(paste0("figures/fig1.pdf"),
         width = 9, height = 14.5, units = "cm", scale = 1.5)

## Table S2
pooled_caption <- paste0(ifelse(mod_type == "logit",
                                "Logistic ",
                                "LPM (OLS) "), 
                         "Regression Results for the data pooled across the thirteen countries.. The dependent variable is the forced choice decision. ",
                         "These are the estimates used to construct the conjoint plots presented in ",
                         "Figure 1 of the main paper. ",
                         "Standard errors are clustered by country.")

global_data %>%
  clustered_mod(.,
                formula = select ~ vulnerability + transmission + income + occupation + age_category,
                cluster = "country",
                type = mod_type,
                weights = FALSE) %>% 
  table_make(results = .,
             type = mod_type,
             textsize = "scriptsize",
             caption = pooled_caption,
             label = "tab:pooled",
             file = "tables/pooled_model")


## Figure 2 -- Individual country results (weighted)

country_results_wgtd <- list()

country_results_wgtd[[mod_type]] <- global_data %>%
    group_by(country) %>% 
    nest() %>% # by here, created separate data.frames for each country
    mutate(coefs = map(data, # for each country, run conjoint model
                       function (x) {
                         figure_results(x,
                                      formula = select ~ vulnerability + transmission + income + occupation + age_category,
                                      cluster = "id",
                                      type = mod_type,
                                      weights = TRUE)
                       })) %>% 
    select(-data) %>% 
    unnest(cols = coefs) %>% # combine all conjoint results into single table
    results_tidy(.) %>% # apply some formatting
    mutate(region = case_when( # add in region
      country %in% c("Colombia","Brazil","Chile") ~ "South America",
      country %in% c("Canada","US") ~ "North America",
      country %in% c("Spain","UK","France","Italy") ~ "Europe",
      country %in% c("China","Australia", "India","Uganda") ~ "Asia/Africa"
    )) 


country_plot(country_results_wgtd, mod_type, "figures/fig2.pdf")

## Country-by-country combined results table
combined_caption <- paste0("Country ", ifelse(mod_type == "logit",
                                              "Logistic ",
                                              "LPM (OLS) "), 
                           "Regression Results. The dependent variable is the forced choice decision. ",
                           "These are the estimates used to construct the conjoint plots presented in ",
                           "Figure 2 of the main paper. ",
                           "Model weights are used to reflect the national populations of interest.")

## Table S3  
global_data %>%
  group_by(country) %>% 
  nest() %>% # by here, created separate data.frames for each country
  mutate(coefs = map(data, # for each country, run conjoint model
                     function (x) {
                       clustered_mod(x,
                                     formula = select ~ vulnerability + transmission + income + occupation + age_category,
                                     cluster = "id",
                                     type = mod_type,
                                     weights = TRUE)
                     })) %>% 
  select(-data) %>% 
  
  multi_table_make(., type = mod_type, textsize = "scriptsize", 
                   filename = "tables/conjoint_combined_results",
                   header = list("Country Model" = 1:length(.$country)),
                   model_names = .$country, sideways = TRUE,
                   caption = combined_caption,
                   label = "table:combined_weighted")

## Unweighted results
country_results <- list()
country_results[[mod_type]] <- global_data %>%
  group_by(country) %>% 
  nest() %>% # by here, created separate data.frames for each country
  mutate(coefs = map(data, # for each country, run conjoint model
                     function (x) {
                       figure_results(x,
                                      formula = select ~ vulnerability + transmission + income + occupation + age_category,
                                      cluster = "id",
                                      type = mod_type,
                                      weights = FALSE)
                     })) %>% 
  select(-data) %>% 
  unnest(cols = coefs) %>% # combine all conjoint results into single table
  results_tidy(.) %>% # apply some formatting
  mutate(region = case_when( # add in region
    country %in% c("Colombia","Brazil","Chile") ~ "South America",
    country %in% c("Canada","US") ~ "North America",
    country %in% c("Spain","UK","France","Italy") ~ "Europe",
    country %in% c("China","Australia", "India","Uganda") ~ "Asia/Africa"
  )) 

## Figure S2
country_plot(country_results, mod_type, "figures/conjoint_combined_results_UNWEIGHTED.pdf")

## Table S4
combined_caption <- paste0("Unweighted Country ", ifelse(mod_type == "logit",
                                              "Logistic ",
                                              "LPM (OLS) "), 
                           "Regression Results. The dependent variable is the Forced Choice decision. ",
                           "These are the estimates used to construct the conjoint plots presented in ",
                           "Figure \\ref{fig:combined_unweighted}",
                           ".")

global_data %>%
  group_by(country) %>% 
  nest() %>% # by here, created separate data.frames for each country
  mutate(coefs = map(data, # for each country, run conjoint model
                     function (x) {
                       clustered_mod(x,
                                     formula = select ~ vulnerability + transmission + income + occupation + age_category,
                                     cluster = "id",
                                     type = mod_type,
                                     weights = FALSE)
                     })) %>% 
  select(-data) %>% 
  
  multi_table_make(., type = mod_type, textsize = "scriptsize", 
                   filename = "tables/conjoint_combined_results_UNWEIGHTED",
                   header = list("Country Model" = 1:length(.$country)),
                   model_names = .$country, sideways = TRUE,
                   caption = combined_caption,
                   label = "combined_unweighted")

  
#### 3. Heterogeneity analysis ####
heterogeneity_results <- list()

het_age <- global_data %>% 
  filter(age > 17 & age < 120) %>% 
  mutate(age_bin = case_when(age < 40 ~ "18-39",
                             age < 60 ~ "40-59",
                             age < 120 ~ "60+")) %>% 
  group_by(age_bin)

het_inc <- global_data %>% 
  filter(!is.na(ind_inc)) %>% 
  group_by(ind_inc)

het_ideo <- global_data %>% 
  filter(!is.na(ideology)) %>% 
  mutate(ideo_bin = case_when(ideology <= 5 ~ "Left/Centre",
                              ideology > 5 ~ "Right")) %>% 
  group_by(ideo_bin)

het_gender <- global_data %>% 
  filter(!is.na(gender)) %>% 
  group_by(gender)

het_educ <- global_data %>% 
  filter(!is.na(education)) %>% 
  group_by(education)

het_hes <- global_data %>% 
  mutate(hesitancy = case_when(hes_covid_2 %in% c("Strongly agree","Agree") ~ "High",
                               hes_covid_2 %in% c("Strongly disagree","Disagree") ~ "Low",
                               hes_covid_2 == "Neither agree nor disagree" ~ "Moderate")) %>% 
  filter(!is.na(hesitancy)) %>% 
  group_by(hesitancy) 

heterogeneity_results[["unweighted"]] <- rbind(
    
    analyse_het(het_age, mod_type = mod_type, weights = FALSE) %>% 
      rename(group = age_bin) %>% 
      mutate(covar = "Age",
             group = paste0("Age: ",group)),
    
    analyse_het(het_inc, mod_type = mod_type, weights = FALSE) %>% 
      rename(group = ind_inc) %>% 
      mutate(covar = "Income",
             group = paste0("Income: ",group)),
    
    analyse_het(het_ideo, mod_type = mod_type, weights = FALSE) %>% 
      rename(group = ideo_bin) %>% 
      mutate(covar = "Ideology",
             group = paste0("Ideology: ",group)),
    
    analyse_het(het_gender, mod_type = mod_type, weights = FALSE) %>% 
      rename(group = gender) %>% 
      mutate(covar = "Gender",
             group = paste0("Gender: ",group)),
    
    analyse_het(het_educ, mod_type = mod_type, weights = FALSE) %>% 
      rename(group = education) %>% 
      mutate(covar = "Education",
             group = paste0("Education: ",group)),
    
    analyse_het(het_hes, mod_type = mod_type, weights = FALSE) %>% 
      rename(group = hesitancy) %>% 
      mutate(covar = "Vaccine Concern",
             group = paste0("Vaccine Concern: ",group))
    
  )

het_groups <- c("Education","Gender","Ideology","Income","Age", "Vaccine Concern")

het_plot_data <- heterogeneity_results[["unweighted"]] %>% 
    mutate(group = factor(group, 
                          levels = c("Age: 18-39","Age: 40-59","Age: 60+",
                                     "Vaccine Concern: Low","Vaccine Concern: Moderate","Vaccine Concern: High",
                                     "Education: High","Education: Medium","Education: Low",
                                     "Gender: Female","Gender: Male","Gender: Other",
                                     "Ideology: Left/Centre","Ideology: Right",
                                     "Income: Low","Income: High")))

## Figure 3
het_plot(het_plot_data, c("Age","Ideology","Income"), "unweighted")

file.rename("figures/conjoint_heterogeneity_age_ide_inc_unweighted.pdf",
            "figures/fig3.pdf")

## Figure S3
het_plot(het_plot_data, c("Vaccine Concern","Education","Gender"), "unweighted")

# Heterogeneity tables

## Table S5
het_table(het_age, group_var = "age_bin",group_print = "Age", 
          mod_type = mod_type, model_order = c("18-39","40-59","60+"), weights = FALSE)

## Table S6
het_table(het_inc, group_var = "ind_inc",group_print = "Income", 
          mod_type = mod_type, model_order = c("Low","High"), weights = FALSE)

## Table S7
het_table(het_ideo, group_var = "ideo_bin",group_print = "Ideology", 
          mod_type = mod_type, model_order = c("Left/Centre","Right"), weights = FALSE)

## Table S8
het_table(het_hes, group_var = "hesitancy",group_print = "COVID-19 Vaccine Concern", 
          mod_type = mod_type, model_order = c("High","Moderate","Low"), weights = FALSE)

## Table S9
het_table(het_educ, group_var = "education",group_print = "Education", 
          mod_type = mod_type, model_order = c("High","Medium","Low"), weights = FALSE)

## Table S10
het_table(het_gender, group_var = "gender",group_print = "Gender", 
          mod_type = mod_type, model_order = c("Female","Male","Other"), weights = FALSE)

#### 4. WTP analysis ####

## Figure 4a -- mode of allocation
global_data %>% 
  select(id, country, wtp_access, weights) %>% 
  distinct() %>% 
  group_by(country) %>% 
  mutate(wgts = ifelse(country %in% c("Canada","Spain","India","Uganda"), 1/n(),weights)) %>% 
  group_by(country, wtp_access) %>% 
  summarise(prop = sum(wgts)) %>% 
  mutate(perc = round(prop*100,0),
         perc = ifelse(perc == 0, "",as.character(perc))) %>% 
  mutate(country = as.factor(country)) %>% 
  mutate(country = factor(country, levels = rev(levels(country)))) %>% 
  
  ggplot(aes(fill=wtp_access, x=country, y = prop)) + 
  geom_col(width = 0.7) +
  geom_text(aes(label = perc), position = position_fill(vjust = 0.5), size = 7) +
  labs(y = "Percentage of Respondents", x = "") +
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip() + 
  scale_fill_discrete(name="Mode of allocation",
                      breaks=c("Do not know", 
                               "Vaccines are only available for private purchase", 
                               "Vaccines made available by government but citizens can pay privately to gain access",
                               "Vaccines only made available by government at low or no cost",
                               "Prefer not to say"),
                      labels=c("Do not know", 
                               "Private", 
                               "Government + Private",
                               "Government",
                               "Prefer not to say")) +
  theme_pubr() +
  theme(legend.position="right",
        text = element_text(size = 24)) +
  ggsave("figures/fig4a.pdf", width = 16, height = 7)


# Figure 4b -- willing to pay privately
global_data %>% 
  select(id, country, wtp_private, weights) %>% 
  distinct() %>% 
  group_by(country) %>% 
  mutate(wgts = ifelse(country %in% c("Canada","Spain","India","Uganda"), 1/n(),weights)) %>% 
  group_by(country, wtp_private) %>% 
  summarise(prop = sum(wgts)) %>% 
  mutate(perc = round(prop*100,0),
         perc = ifelse(perc == 0, "",as.character(perc))) %>% 
  mutate(country = as.factor(country)) %>% 
  mutate(country = factor(country, levels = rev(levels(country)))) %>% 
  
  ggplot(aes(fill=wtp_private, x=country, y = prop)) + 
  geom_col(width = 0.7) +
  geom_text(aes(label = perc), position = position_fill(vjust = 0.5), size = 7) +
  labs(y = "Percentage of Respondents", x = "") +
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip() + 
  
  scale_fill_discrete(name="Willing to purchase\na COVID-19 vaccine\nfrom a private supplier",
                      breaks=c("Do not know", "No", "Yes","Prefer not to say"),
                      labels=c("Do not know", "No", "Yes","Prefer not to say")) + 
  theme_pubr() +
  theme(legend.position="right",
        text = element_text(size = 24)) +
  ggsave("figures/fig4b.pdf", width = 16, height = 7)

## Figure 4c -- Government mandated vaccine

global_data %>% 
  select(id, country, int_pol_implem_6, weights) %>% 
  distinct() %>% 
  mutate(country = as.factor(country)) %>% 
  mutate(country = factor(country, levels = rev(levels(country)))) %>% 
  
  mutate(weights = ifelse(country %in% c("Canada","Spain","India","Uganda"), 1/n(),weights)) %>% 
  
  ggplot(aes(x = int_pol_implem_6, y = country, fill = country)) +
  geom_density_ridges(aes(height=..density..,
                          weight=weights),
                      stat = "density",
                      scale = 3, size = 0.4, alpha = 0.7) +
  scale_fill_manual(values=as.vector(polychrome(16)[3:16])) +
  labs(x = "Score (0 - 100)",
       y = "")+
  theme_pubr() +
  theme(legend.position = "none",
        legend.key.width = unit(4,"line"),
        text = element_text(size = 24),
        plot.title = element_text(hjust = 0.5)) +
  ggsave("figures/fig4c.pdf", width = 16, height = 11)


## Unweighted versions

## Figure S4a

global_data %>% 
  select(id, country, wtp_access) %>% 
  distinct() %>% 
  group_by(country, wtp_access) %>% 
  summarise(count = n()) %>% 
  group_by(country) %>% 
  summarise(prop = count/sum(count),
            perc = round(prop*100,0),
            wtp_access = wtp_access) %>% 
  mutate(perc = ifelse(perc == 0, "",as.character(perc))) %>% 
  mutate(country = as.factor(country)) %>% 
  mutate(country = factor(country, levels = rev(levels(country)))) %>% 
  
  ggplot(aes(fill=wtp_access, x=country, y = prop)) + 
  geom_col(width = 0.7) +
  geom_text(aes(label = perc), position = position_fill(vjust = 0.5), size = 7) +
  labs(y = "Percentage of Respondents", x = "") +
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip() + 
  scale_fill_discrete(name="Mode of allocation",
                      breaks=c("Do not know", 
                               "Vaccines are only available for private purchase", 
                               "Vaccines made available by government but citizens can pay privately to gain access",
                               "Vaccines only made available by government at low or no cost",
                               "Prefer not to say"),
                      labels=c("Do not know", 
                               "Private", 
                               "Government + Private",
                               "Government",
                               "Prefer not to say")) +
  theme_pubr() +
  theme(legend.position="right",
        text = element_text(size = 24)) +
  ggsave("figures/willingness_to_pay_unweighted.pdf", width = 16, height = 7)

## Figure S4b
global_data %>% 
  select(id, country, wtp_private) %>% 
  distinct() %>% 
  group_by(country, wtp_private) %>% 
  summarise(count = n()) %>% 
  group_by(country) %>% 
  summarise(prop = count/sum(count),
            perc = round(prop*100,0),
            wtp_private = wtp_private) %>% 
  mutate(perc = ifelse(perc == 0, "",as.character(perc))) %>% 
  mutate(country = as.factor(country)) %>% 
  mutate(country = factor(country, levels = rev(levels(country)))) %>% 
  
  ggplot(aes(fill=wtp_private, x=country, y = prop)) + 
  geom_col(width = 0.7) +
  geom_text(aes(label = perc), position = position_fill(vjust = 0.5), size = 7) +
  labs(y = "Percentage of Respondents", x = "") +
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip() + 
  
  scale_fill_discrete(name="Willing to purchase\na COVID-19 vaccine\nfrom a private supplier",
                      breaks=c("Do not know", "No", "Yes","Prefer not to say"),
                      labels=c("Do not know", "No", "Yes","Prefer not to say")) + 
  theme_pubr() +
  theme(legend.position="right",
        text = element_text(size = 24)) +
  ggsave("figures/wtp_private_unweighted.pdf", width = 16, height = 7)

## Figure S4c

global_data %>% 
  select(id, country, int_pol_implem_6) %>% 
  distinct() %>% 
  mutate(country = as.factor(country)) %>% 
  mutate(country = factor(country, levels = rev(levels(country)))) %>% 
  
  ggplot(aes(x = int_pol_implem_6, y = country, fill = country)) +
  geom_density_ridges(scale = 3, size = 0.4, alpha = 0.7) +
  scale_fill_manual(values=as.vector(polychrome(16)[3:16])) +
  # scale_fill_gradientn(
  #   colours = c("#0D0887FF", "#CC4678FF", "#F0F921FF"),
  #   name = "",
  #   breaks=c(0,100),labels=c("0 Very Much Disagree","100 Very Much Agree")
  # ) +
  labs(x = "Score (0 - 100)",
       y = "")+
  theme_pubr() +
  theme(legend.position = "none",
        legend.key.width = unit(4,"line"),
        text = element_text(size = 24),
        plot.title = element_text(hjust = 0.5)) +
  ggsave("figures/distplot_final_unweighted.pdf", width = 16, height = 9)

## Ideology plots

## Figure S5

global_data %>% 
  mutate(ideo_bin = case_when(ideology <= 5 ~ "Left/Centre",
                              ideology > 5 ~ "Right")) %>% 
  filter(!is.na(ideo_bin)) %>% 
  select(id, country, ideo_bin, wtp_access) %>% 
  distinct() %>% 
  group_by(country, ideo_bin, wtp_access) %>% 
  summarise(count = n()) %>% 
  group_by(country, ideo_bin) %>% 
  summarise(prop = count/sum(count),
            perc = round(prop*100,0),
            wtp_access = wtp_access) %>% 
  mutate(perc = ifelse(perc == 0, "",as.character(perc))) %>% 
  mutate(country = as.factor(country)) %>% 
  mutate(country = factor(country, levels = levels(country))) %>% 
  
  ggplot(aes(fill=wtp_access, x=ideo_bin, y = prop)) + 
  facet_wrap(country~., ncol = 1) +
  geom_col(width = 0.7) +
  geom_text(aes(label = perc), position = position_fill(vjust = 0.5), size = 7) +
  labs(y = "Percentage of Respondents", x = "") +
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip() + 
  scale_fill_discrete(name="Mode of allocation",
                      breaks=c("Do not know", 
                               "Vaccines are only available for private purchase", 
                               "Vaccines made available by government but citizens can pay privately to gain access",
                               "Vaccines only made available by government at low or no cost",
                               "Prefer not to say"),
                      labels=c("Do not know", 
                               "Private", 
                               "Government + Private",
                               "Government",
                               "Prefer not to say")) +
  theme_pubr() +
  theme(legend.position="bottom",
        text = element_text(size = 24)) + 
  ggsave("figures/willingness_to_pay_unweighted_ideology.pdf", width = 16, height = 16)

## Figure S6
global_data %>% 
  mutate(ideo_bin = case_when(ideology <= 5 ~ "Left/Centre",
                              ideology > 5 ~ "Right")) %>% 
  filter(!is.na(ideo_bin)) %>% 
  select(id, country, ideo_bin, wtp_private) %>% 
  distinct() %>% 
  group_by(country, ideo_bin, wtp_private) %>% 
  summarise(count = n()) %>% 
  group_by(country, ideo_bin) %>% 
  summarise(prop = count/sum(count),
            perc = round(prop*100,0),
            wtp_private = wtp_private) %>% 
  mutate(perc = ifelse(perc == 0, "",as.character(perc))) %>% 
  mutate(country = as.factor(country)) %>% 
  mutate(country = factor(country, levels = levels(country))) %>% 
  
  ggplot(aes(fill=wtp_private, x=ideo_bin, y = prop)) + 
  facet_wrap(.~country, ncol = 1) +
  geom_col(width = 0.7) +
  geom_text(aes(label = perc), position = position_fill(vjust = 0.5), size = 7) +
  labs(y = "Percentage of Respondents", x = "") +
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip() + 
  
  scale_fill_discrete(name="Willing to purchase a COVID-19\nvaccine from a private supplier",
                      breaks=c("Do not know", "No", "Yes","Prefer not to say"),
                      labels=c("Do not know", "No", "Yes","Prefer not to say")) + 
  theme_pubr() +
  theme(legend.position="bottom",
        text = element_text(size = 24)) +
  ggsave("figures/wtp_private_unweighted_ideology.pdf", width = 16, height = 16)

## Figure S7

global_data %>% 
  mutate(ideo_bin = case_when(ideology <= 5 ~ "Left/Centre",
                              ideology > 5 ~ "Right")) %>% 
  filter(!is.na(ideo_bin)) %>% 
  select(id, country, ideo_bin, int_pol_implem_6) %>% 
  distinct() %>% 
  group_by(country, ideo_bin) %>% 
  summarise(avg = mean(int_pol_implem_6, na.rm = TRUE),
  ) %>% 
  mutate(country = as.factor(country)) %>% 
  mutate(country = factor(country, levels = rev(levels(country)))) %>% 
  
  ggplot(aes(x = avg, y = country, fill = ideo_bin)) +
  geom_col(position = position_dodge()) +
  scale_fill_manual(values = c("firebrick2","dodgerblue2")) +
  labs(x = "Average score",
       y = "",
       fill = "Ideology") +
  theme_pubr() +
  theme(legend.position = "bottom",
        legend.key.width = unit(4,"line"),
        text = element_text(size = 24),
        plot.title = element_text(hjust = 0.5)) +
  ggsave("figures/distplot_final_unweighted_ideology.pdf", width = 16, height = 14)


## WGI Indicator correlations

WGI_data <- global_data %>% 
  select(id, country, wtp_access) %>% 
  distinct() %>% 
  group_by(country, wtp_access) %>% 
  summarise(count = n()) %>% 
  group_by(country) %>% 
  summarise(prop = count/sum(count),
            perc = round(prop*100,0),
            wtp_access = wtp_access) %>% 
  filter(wtp_access %in% c("Vaccines are only available for private purchase",
                           "Vaccines made available by government but citizens can pay privately to gain access")) %>% 
  group_by(country) %>% 
  summarise(private = sum(prop)) %>% 
  left_join(read_csv("data/wgi_indicator.csv"), by = "country") %>% 
  mutate(estimate_2019 = as.numeric(estimate_2019))

cor.test(WGI_data$private, WGI_data$estimate_2019)
cor.test(WGI_data[WGI_data$country != "Uganda",]$private, WGI_data[WGI_data$country != "Uganda",]$estimate_2019)

  
#### 5. Summary statistic tables ####

## Randomisation of attribute-levels

rand_tab <- global_data %>%
  select("vulnerability", "transmission", "income", "occupation", "age_category") %>% 
  as.data.frame(.) %>% 
  dummy.data.frame(.) 

vars <- colnames(rand_tab) %>% 
  gsub("vulnerability|transmission|occupation|age_category","",.) %>% 
  ifelse(str_starts(.,"income"), str_sub(.,7), .)


## Table S1
sum_conjoint <- data.frame(
  
  `Conjoint attribute variable` = vars,
  `Mean` = round(as.double(colMeans(rand_tab)),2),
  `SD` = round(as.double(apply(rand_tab, 2, sd)),2),
  `Min.` = 0,
  `Max.` = 1,
  `N` = as.character(as.numeric(colSums(rand_tab))),
  
  check.names = FALSE
  
  ) %>% 
  
  xtable(.,
         caption = "Combined Global Survey: Summary of Conjoint Attribute Randomisation",
         label = "tab:randomisation",
         digits = 2) %>% 
  print(., 
        include.rownames = FALSE,
        floating.environment = "sidewaystable",
        file = "tables/summary_tab_randomisation.tex")


#### 6. Experimental tables included in Section 3 of SI ####

## Table S13
summary_data <- global_data %>% 
  mutate(#ideology = ifelse(country == "China",0,ideology), # Prevent China being filtered out, as q. not asked
         age_bin = case_when(age < 40 ~ "18-39",
                             age < 60 ~ "40-59",
                             age < 120 ~ "60+"),
         ideo_bin = case_when(ideology <= 5 ~ "Left/Centre",
                              ideology > 5 ~ "Right"),
         hesitancy = case_when(hes_covid_2 %in% c("Strongly agree","Agree") ~ "High",
                               hes_covid_2 %in% c("Strongly disagree","Disagree") ~ "Low",
                               hes_covid_2 == "Neither agree nor disagree" ~ "Moderate")
         )

sumtab_1 <- cbind(
    
  {summary_data %>% filter(age > 17 & age < 120) %>% 
      group_by(country) %>% 
      summarise(`18-39` = 100*sum(age_bin == "18-39", na.rm=TRUE)/n(),
                `40-59` = 100*sum(age_bin == "40-59", na.rm=TRUE)/n(),
                `60+` = 100*sum(age_bin == "60+", na.rm=TRUE)/n()) %>% 
      mutate(across(where(is.numeric), function (x) paste0(round(x,1),"%")))},
    
    {summary_data %>% filter(!is.na(ideology) | country == "China") %>% 
        group_by(country) %>% 
        summarise(`Left/Centre` = 100*sum(ideo_bin == "Left/Centre",na.rm=TRUE)/n(),
                  Right = 100*sum(ideo_bin == "Right",na.rm=TRUE)/n()) %>% 
        mutate(across(where(is.numeric), function (x) paste0(round(x,1),"%"))) %>% 
        select(-country)},
    
    {summary_data %>% filter(!is.na(ind_inc)) %>% 
        group_by(country) %>% 
        summarise(Inc_Low = 100*sum(ind_inc == "Low",na.rm=TRUE)/n(),
                  Inc_High = 100*sum(ind_inc == "High",na.rm=TRUE)/n()) %>% 
        mutate(across(where(is.numeric), function (x) paste0(round(x,1),"%"))) %>% 
        select(-country)}
  ) %>% 
  xtable(.) %>% print(only.contents = TRUE, 
                      include.rownames = FALSE,
                      include.colnames = FALSE,
                      hline.after = c(nrow(.)),
                      file = "tables/summary_tab_age_ideo_inc.tex")

## Table S14
sumtab_2 <- cbind(
  
  {summary_data %>% filter(!is.na(hesitancy)) %>% 
      group_by(country) %>% 
      summarise(Hes_Low = 100*sum(hesitancy == "Low",na.rm=TRUE)/n(),
                Hes_Moderate = 100*sum(hesitancy == "Moderate",na.rm=TRUE)/n(),
                Hes_High = 100*sum(hesitancy == "High",na.rm=TRUE)/n()) %>% 
      mutate(across(where(is.numeric), function (x) paste0(round(x,1),"%")))},
  
    {summary_data %>% filter(!is.na(education)) %>% 
        group_by(country) %>% 
        summarise(Ed_Low = 100*sum(education == "Low",na.rm=TRUE)/n(),
                  Ed_Medium = 100*sum(education == "Medium",na.rm=TRUE)/n(),
                  Ed_High = 100*sum(education == "High",na.rm=TRUE)/n()) %>% 
        mutate(across(where(is.numeric), function (x) paste0(round(x,1),"%"))) %>% 
      select(-country)},
  
  {summary_data %>% filter(!is.na(gender)) %>% 
      group_by(country) %>% 
      summarise(Female = 100*sum(gender == "Female", na.rm=TRUE)/n(),
                Male = 100*sum(gender == "Male", na.rm=TRUE)/n(),
                Other = 100*sum(gender == "Other", na.rm=TRUE)/n()) %>% 
      mutate(across(where(is.numeric), function (x) paste0(round(x,1),"%"))) %>% 
      select(-country)}
) %>% 
  xtable(.) %>% print(only.contents = TRUE, 
                      include.rownames = FALSE,
                      include.colnames = FALSE,
                      hline.after = c(nrow(.)),
                      file = "tables/summary_tab_hes_edu_gen.tex")

## Population-Sample Comparison
## Table S15
pop_comparison <- read_csv("data/population_comparison.csv")

samp_comparison <- cbind(
  
  {summary_data %>% filter(age > 17 & age < 120) %>% 
      group_by(country) %>% 
      summarise(`18-39` = 100*sum(age_bin == "18-39", na.rm=TRUE)/n(),
                `40-59` = 100*sum(age_bin == "40-59", na.rm=TRUE)/n(),
                `60+` = 100*sum(age_bin == "60+", na.rm=TRUE)/n())},
  
  {summary_data %>% filter(!is.na(gender)) %>% 
      group_by(country) %>% 
      summarise(Female = 100*sum(gender == "Female", na.rm=TRUE)/n(),
                Male = 100*sum(gender == "Male", na.rm=TRUE)/n(),
                Other = 100*sum(gender == "Other", na.rm=TRUE)/n()) %>% 
      select(-country)},
  
  {summary_data %>% filter(!is.na(education)) %>% 
      group_by(country) %>% 
      summarise(Ed_Low = 100*sum(education == "Low",na.rm=TRUE)/n(),
                Ed_Medium = 100*sum(education == "Medium",na.rm=TRUE)/n(),
                Ed_High = 100*sum(education == "High",na.rm=TRUE)/n()) %>% 
      select(-country)})

# Ensure common order
country_order <- c("Australia","Brazil","Canada","Chile","China","Colombia","France","India","Italy","Spain","Uganda","UK", "US")
col_order <- c("18-39","40-59","60+","Female","Male","Ed_High","Ed_Medium","Ed_Low")

pop_samp_comp <- samp_comparison[which(samp_comparison$country == country_order),col_order]
pop_comparison <- pop_comparison[which(pop_comparison$country == country_order),col_order]

for (i in 1:nrow(pop_samp_comp)) {
  for (j in 1:ncol(pop_samp_comp)) {
    
    pop_samp_comp[i,j] <- paste0(round(as.numeric(pop_samp_comp[i,j]),0),
                                 "/",
                                 round(as.numeric(pop_comparison[i,j]),0)) 
    
  }
}

pop_samp_comp <- cbind(country = country_order, pop_samp_comp) %>% 
  xtable(.) %>% print(only.contents = TRUE, 
                      include.rownames = FALSE,
                      include.colnames = FALSE,
                      hline.after = c(nrow(.)),
                      file = "tables/pop_samp_comparison.tex")



